
這時候我就要先來講一個小故事了
https://github.com/dboyliao/tf_snippets/blob/master/labs/batch_normalization.ipynb
https://github.com/dboyliao/tf_snippets/blob/master/labs/batch_normalization_v2.ipynb
from tensorflow.contrib.layers import batch_norm
| Dboy實作 | contrib... |
|---|---|
![]() |
![]() |
一定要記下tf跟keras目前寫起來對應的版號,雖然現在可能沒以前更新速度那麼嚴重,但這會是個省去以後很多麻煩的好習慣
import numpy as np
import keras
import tensorflow as tf
tf.__version__, keras.__version__
Using TensorFlow backend.
('1.3.0', '2.0.8')
from keras.layers import Input, Dense, Activation
from keras.models import Model, Sequential
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import to_categorical
# mulilayer logistic
graph = tf.Graph()
with graph.as_default():
tf_images_batch = tf.placeholder(tf.float32, shape=[None, 28*28], name="images_batch")
tf_target_prob = tf.placeholder(tf.float32, shape=[None, 10], name="target_prob")
# Layer 1
tf_W1 = tf.Variable(0.05*np.random.randn(28*28, 200),
name="W1",
dtype=tf.float32)
tf_b1 = tf.Variable(np.zeros(200), dtype=tf.float32, name="b1")
tf_zscore1 = tf.nn.bias_add(tf.matmul(tf_images_batch, tf_W1),
tf_b1,
name="zcore_1")
tf_act1 = tf.nn.sigmoid(tf_zscore1, name="activation_1")
# Layer 2
tf_W2 = tf.Variable(0.05*np.random.randn(200, 10),
name="W2",
dtype=tf.float32)
tf_b2 = tf.Variable(np.zeros(10), dtype=tf.float32, name="b2")
tf_zscore2 = tf.nn.bias_add(tf.matmul(tf_act1, tf_W2), tf_b2, name="zscore_2")
tf_prob = tf.nn.softmax(tf_zscore2, 1)
# Cross-Entropy Loss
tf_loss = -tf.reduce_mean(tf_target_prob*tf.log(tf_prob))
# training
train_op = tf.train.GradientDescentOptimizer(1e-1).minimize(tf_loss)
saver = tf.train.Saver()
mulilayer_logistic = [Dense(200, activation='sigmoid' ,name='Layer_1', input_shape=(28*28,)),
Dense(10, activation='softmax' ,name='Layer_2')]
model = Sequential(mulilayer_logistic)
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= Layer_1 (Dense) (None, 200) 157000 _________________________________________________________________ Layer_2 (Dense) (None, 10) 2010 ================================================================= Total params: 159,010 Trainable params: 159,010 Non-trainable params: 0 _________________________________________________________________
input_layer = Input(name='input0', shape=(28*28,), dtype='float32')
fc_layer_1 = Dense(200, activation='relu', name='Layer_1')(input_layer)
fc_layer_2 = Dense(10, activation='softmax', name='Layer_2')(fc_layer_1)
model = Model(inputs=[input_layer], outputs=[fc_layer_2])
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input0 (InputLayer) (None, 784) 0 _________________________________________________________________ Layer_1 (Dense) (None, 200) 157000 _________________________________________________________________ Layer_2 (Dense) (None, 10) 2010 ================================================================= Total params: 159,010 Trainable params: 159,010 Non-trainable params: 0 _________________________________________________________________
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
batch_size=100
num_epochs = 50
log_every_epoch = 10
with tf.Session(graph=graph) as sess:
tf.global_variables_initializer().run()
for epoch in range(1, num_epochs+1):
current_epoch = fashion_mnist.train.epochs_completed
while fashion_mnist.train.epochs_completed <= current_epoch:
images_batch, labels_batch = fashion_mnist.train.next_batch(batch_size)
feed_dict = {tf_images_batch: images_batch,
tf_target_prob: labels_batch}
_, l = sess.run([train_op, tf_loss], feed_dict=feed_dict)
else:
if epoch % log_every_epoch == 0:
print("epoch: ", epoch)
print("current training loss:", l)
feed_dict = {tf_images_batch: fashion_mnist.test.images}
prob = sess.run(tf_prob, feed_dict=feed_dict)
acc = accuracy(np.argmax(prob, axis=1),
np.argmax(fashion_mnist.test.labels, axis=1))
print("testing acc: {:.2f}%".format(acc))
chkp_path = saver.save(sess,
global_step=epoch,
save_path="models_chkp/fashion_mnist_chkp-{:.4f}".format(acc))
epoch: 10 current training loss: 0.0527372 testing acc: 76.12% epoch: 20 current training loss: 0.0660421 testing acc: 79.88% epoch: 30 current training loss: 0.053833 testing acc: 81.57% epoch: 40 current training loss: 0.0458876 testing acc: 82.03% epoch: 50 current training loss: 0.0322738 testing acc: 82.75%
batch_size = 100
epochs = 5
model.fit(x_train, y_train,
batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(x_test, y_test))
Train on 60000 samples, validate on 10000 samples Epoch 1/5 60000/60000 [==============================] - 3s - loss: 0.9858 - acc: 0.6886 - val_loss: 0.7254 - val_acc: 0.7558 Epoch 2/5 60000/60000 [==============================] - 2s - loss: 0.6448 - acc: 0.7887 - val_loss: 0.6206 - val_acc: 0.7894 Epoch 3/5 60000/60000 [==============================] - 2s - loss: 0.5692 - acc: 0.8121 - val_loss: 0.5711 - val_acc: 0.8039 Epoch 4/5 60000/60000 [==============================] - 2s - loss: 0.5301 - acc: 0.8218 - val_loss: 0.5409 - val_acc: 0.8159 Epoch 5/5 60000/60000 [==============================] - 4s - loss: 0.5045 - acc: 0.8291 - val_loss: 0.5238 - val_acc: 0.8177
<keras.callbacks.History at 0x7fdad03cf2b0>
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: 0.523831369305 Test accuracy: 0.8177
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
from PIL import Image
import numpy as np
Image.fromarray(np.uint8(x_train[0].reshape(28, 28)))
x_train.shape
(60000, 28, 28)
y_train.shape
(60000,)
y_train[:10]
array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=uint8)
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
f, ax = plt.subplots(figsize=(13, 10))
sns.heatmap(x_train[0], annot=True, fmt="d", linewidths=.2, ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x7f915b4e4eb8>
fig
x_train = x_train.reshape(60000, 28*28)
x_test = x_test.reshape(10000, 28*28)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
60000 train samples 10000 test samples
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)
y_train[0]
array([ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])
import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
Image.fromarray(np.uint8(x_train[0].reshape(28, 28)))
labels = {0:"T-shirt/top", 1:"Trouser", 2:"Pullover",
3:"Dress", 4:"Coat", 5:"Sandal", 6:"Shirt",
7:"Sneaker", 8:"Bag", 9:"Ankle boot"}
labels[y_train[0]]
'Ankle boot'
f, ax = plt.subplots(figsize=(15, 10))
sns.heatmap(x_train[0], annot=True, fmt="d", linewidths=.2, ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x7f920a180be0>
fig
#model.predict_classes
model.predict(x_train[:1])
array([[ 7.46949183e-07, 1.09039877e-06, 4.68131293e-06,
2.13446970e-06, 9.16834506e-06, 1.93485785e-02,
7.70871975e-06, 1.64444000e-02, 1.35613128e-03,
9.62825418e-01]], dtype=float32)
pd.DataFrame(model.predict(x_train[:1])).transpose().plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7fda62132ba8>
np.argmax(model.predict(x_test[:20]), axis=1)
array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 5, 3, 4, 1, 2, 4, 8, 0])
np.argmax(y_test[:20], axis=1)
array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0])
error_arr = np.argmax(y_test[:100], axis=1) != np.argmax(model.predict(x_test[:100]), axis=1)
error_arr[:20]
array([False, False, False, False, False, False, False, False, False,
False, False, False, True, False, False, False, False, False,
False, False], dtype=bool)
[i for i, bo in enumerate(error_arr) if bo==True][:10]
[12, 20, 21, 23, 25, 27, 29, 42, 43, 45]
pd.DataFrame(model.predict(x_test[12:13]).reshape(-1,), index=list(labels.values())).plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7fda61ea5518>
display(labels[np.argmax(y_test[12:13])])
display(Image.fromarray(np.uint8(x_test[12].reshape(28, 28)*255)))
'Sneaker'
fig
pd.DataFrame(model.predict(x_test[20:21]).reshape(-1,), index=list(labels.values())).plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7fda60461668>
display(labels[np.argmax(y_test[20:21])])
display(Image.fromarray(np.uint8(x_test[20].reshape(28, 28)*255)))
'Pullover'
fig
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.compile做了哪些事¶keras.models.py¶from .engine.training import Model
keras.engine.training.py¶from .. import losses
class Model(Container):
def compile(self, optimizer, loss, metrics=None, loss_weights=None,
sample_weight_mode=None, weighted_metrics=None,
target_tensors=None, **kwargs):
loss = loss or {}
self.optimizer = optimizers.get(optimizer)
self.sample_weight_mode = sample_weight_mode
self.loss = loss
self.loss_weights = loss_weights
# Prepare loss functions.
if isinstance(loss, dict):
for name in loss:
if name not in self.output_names:
pass
loss_functions = []
for name in self.output_names:
if name not in loss:
pass
loss_functions.append(losses.get(loss.get(name)))
elif isinstance(loss, list):
if len(loss) != len(self.outputs):
pass
loss_functions = [losses.get(l) for l in loss]
else:
loss_function = losses.get(loss)
loss_functions = [loss_function for _ in range(len(self.outputs))]
self.loss_functions = loss_functions
keras.losses.get??
def get(identifier):
if identifier is None:
return None
if isinstance(identifier, six.string_types):
identifier = str(identifier)
return deserialize(identifier)
elif callable(identifier):
return identifier
else:
raise ValueError('Could not interpret '
'loss function identifier:', identifier)
keras.losses.get('categorical_crossentropy')
<function keras.losses.categorical_crossentropy>
keras.losses.categorical_crossentropy??
def categorical_crossentropy(y_true, y_pred):
return K.categorical_crossentropy(y_true, y_pred)
keras.losses.get('mse')
<function keras.losses.mean_squared_error>
keras.losses.mean_squared_error??
def mean_squared_error(y_true, y_pred):
return K.mean(K.square(y_pred - y_true), axis=-1)
keras.losses.py¶from . import backend as K
keras.backend.__init__.py¶# Set backend based on KERAS_BACKEND flag, if applicable.
if 'KERAS_BACKEND' in os.environ:
_backend = os.environ['KERAS_BACKEND']
assert _backend in {'theano', 'tensorflow', 'cntk'}
_BACKEND = _backend
# Import backend functions.
if _BACKEND == 'cntk':
sys.stderr.write('Using CNTK backend\n')
from .cntk_backend import *
elif _BACKEND == 'theano':
sys.stderr.write('Using Theano backend.\n')
from .theano_backend import *
elif _BACKEND == 'tensorflow':
sys.stderr.write('Using TensorFlow backend.\n')
from .tensorflow_backend import *
else:
raise ValueError('Unknown backend: ' + str(_BACKEND))
dir(keras.backend.tensorflow_backend)
['Function', '_GRAPH_LEARNING_PHASES', '_GRAPH_UID_DICTS', '_MANUAL_VAR_INIT', '_SESSION', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_convert_string_dtype', '_initialize_variables', '_postprocess_conv2d_output', '_postprocess_conv3d_output', '_preprocess_conv2d_input', '_preprocess_conv2d_kernel', '_preprocess_conv3d_input', '_preprocess_conv3d_kernel', '_preprocess_deconv3d_output_shape', '_preprocess_deconv_output_shape', '_preprocess_padding', '_to_tensor', 'abs', 'all', 'any', 'arange', 'argmax', 'argmin', 'batch_dot', 'batch_flatten', 'batch_get_value', 'batch_normalization', 'batch_set_value', 'bias_add', 'binary_crossentropy', 'cast', 'categorical_crossentropy', 'clear_session', 'clip', 'concatenate', 'constant', 'control_flow_ops', 'conv1d', 'conv2d', 'conv2d_transpose', 'conv3d', 'conv3d_transpose', 'cos', 'count_params', 'ctc', 'ctc_batch_cost', 'ctc_decode', 'ctc_label_dense_to_sparse', 'cumprod', 'cumsum', 'defaultdict', 'depthwise_conv2d', 'dot', 'dropout', 'dtype', 'elu', 'epsilon', 'equal', 'eval', 'exp', 'expand_dims', 'eye', 'flatten', 'floatx', 'foldl', 'foldr', 'function', 'functional_ops', 'gather', 'get_session', 'get_uid', 'get_value', 'get_variable_shape', 'gradients', 'greater', 'greater_equal', 'hard_sigmoid', 'has_arg', 'identity', 'image_data_format', 'image_dim_ordering', 'in_test_phase', 'in_top_k', 'in_train_phase', 'int_shape', 'is_keras_tensor', 'is_placeholder', 'is_sparse', 'l2_normalize', 'learning_phase', 'less', 'less_equal', 'local_conv1d', 'local_conv2d', 'log', 'logsumexp', 'manual_variable_initialization', 'map_fn', 'max', 'maximum', 'mean', 'min', 'minimum', 'moving_average_update', 'moving_averages', 'name_scope', 'ndim', 'normalize_batch_in_training', 'not_equal', 'np', 'one_hot', 'ones', 'ones_like', 'os', 'permute_dimensions', 'placeholder', 'pool2d', 'pool3d', 'pow', 'print_tensor', 'prod', 'py_all', 'py_sum', 'random_binomial', 'random_normal', 'random_normal_variable', 'random_uniform', 'random_uniform_variable', 'relu', 'repeat', 'repeat_elements', 'reset_uids', 'reshape', 'resize_images', 'resize_volumes', 'reverse', 'rnn', 'round', 'separable_conv2d', 'set_image_dim_ordering', 'set_learning_phase', 'set_session', 'set_value', 'shape', 'sigmoid', 'sign', 'sin', 'softmax', 'softplus', 'softsign', 'sparse_categorical_crossentropy', 'spatial_2d_padding', 'spatial_3d_padding', 'sqrt', 'square', 'squeeze', 'stack', 'std', 'stop_gradient', 'sum', 'switch', 'tanh', 'temporal_padding', 'tensor_array_ops', 'tf', 'tf_variables', 'tile', 'to_dense', 'transpose', 'truncated_normal', 'update', 'update_add', 'update_sub', 'var', 'variable', 'zeros', 'zeros_like']
def mean_squared_error(y_true, y_pred):
return K.mean(K.square(y_pred - y_true), axis=-1)
def categorical_crossentropy(y_true, y_pred):
return K.categorical_crossentropy(y_true, y_pred)
def custom_crossentropy(y_true, y_pred):
return -tf.reduce_sum(y_true * keras.backend.log(y_pred), reduction_indices=len(y_pred.get_shape())-1)
def custom_crossentropy(y_true, y_pred):
return -keras.backend.sum(y_true * keras.backend.log(y_pred), axis=len(y_pred.get_shape())-1)
def custom_crossentropy(y_true, y_pred):
y_pred /= tf.reduce_sum(y_pred, axis=len(y_pred.get_shape())-1, keep_dims=True)
_epsilon = keras.backend.tensorflow_backend._to_tensor(keras.backend.epsilon(), y_pred.dtype.base_dtype)
y_pred = tf.clip_by_value(y_pred, _epsilon, 1. - _epsilon)
return -tf.reduce_sum(y_true * keras.backend.log(y_pred), reduction_indices=len(y_pred.get_shape())-1)
def custom_crossentropy(y_true, y_pred):
y_pred /= keras.backend.sum(y_pred, axis=len(y_pred.get_shape())-1, keepdims=True)
_epsilon = keras.backend.epsilon()
y_pred = keras.backend.clip(y_pred, _epsilon, 1. - _epsilon)
return -keras.backend.sum(y_true * keras.backend.log(y_pred), axis=len(y_pred.get_shape())-1)
layers = [Dense(256, activation='relu' ,name='fc0', input_shape=(28*28,)),
Dropout(0.5, name='dropout0'),
Dense(128, activation='relu' ,name='fc1'),
Dropout(0.5, name='dropout2'),
Dense(10, activation='softmax', name='predict_layer'),]
model = Sequential(layers)
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= fc0 (Dense) (None, 256) 200960 _________________________________________________________________ dropout0 (Dropout) (None, 256) 0 _________________________________________________________________ fc1 (Dense) (None, 128) 32896 _________________________________________________________________ dropout2 (Dropout) (None, 128) 0 _________________________________________________________________ predict_layer (Dense) (None, 10) 1290 ================================================================= Total params: 235,146 Trainable params: 235,146 Non-trainable params: 0 _________________________________________________________________
from keras.optimizers import SGD, Adam, RMSprop
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.99)
model.compile(loss=custom_crossentropy,
optimizer=adam,
metrics=['accuracy'])
epochs = 10
batch_size = 256
history = model.fit(x_train, y_train, batch_size=batch_size,
epochs=epochs, verbose=1,
validation_data=(x_test, y_test))
Train on 60000 samples, validate on 10000 samples Epoch 1/10 60000/60000 [==============================] - 2s - loss: 0.8170 - acc: 0.7111 - val_loss: 0.4794 - val_acc: 0.8290 Epoch 2/10 60000/60000 [==============================] - 2s - loss: 0.5217 - acc: 0.8177 - val_loss: 0.4385 - val_acc: 0.8424 Epoch 3/10 60000/60000 [==============================] - 2s - loss: 0.4732 - acc: 0.8308 - val_loss: 0.4065 - val_acc: 0.8495 Epoch 4/10 60000/60000 [==============================] - 2s - loss: 0.4382 - acc: 0.8431 - val_loss: 0.3876 - val_acc: 0.8607 Epoch 5/10 60000/60000 [==============================] - 2s - loss: 0.4177 - acc: 0.8506 - val_loss: 0.3860 - val_acc: 0.8587 Epoch 6/10 60000/60000 [==============================] - 2s - loss: 0.4031 - acc: 0.8560 - val_loss: 0.3661 - val_acc: 0.8661 Epoch 7/10 60000/60000 [==============================] - 2s - loss: 0.3923 - acc: 0.8578 - val_loss: 0.3679 - val_acc: 0.8676 Epoch 8/10 60000/60000 [==============================] - 2s - loss: 0.3818 - acc: 0.8627 - val_loss: 0.3602 - val_acc: 0.8696 Epoch 9/10 60000/60000 [==============================] - 2s - loss: 0.3733 - acc: 0.8654 - val_loss: 0.3513 - val_acc: 0.8742 Epoch 10/10 60000/60000 [==============================] - 2s - loss: 0.3666 - acc: 0.8670 - val_loss: 0.3491 - val_acc: 0.8722

act = 'sigmoid'
layer_init = 'glorot_uniform'
encoding_dim = 2
input_ = Input(name='input_0', shape=(28*28,))
encoded = Dense(128, activation=act, kernel_initializer=layer_init, name='encode_1')(input_)
encoded = Dense(64, activation=act, kernel_initializer=layer_init, name='encode_2')(encoded)
encoded = Dense(10, activation=act, kernel_initializer=layer_init, name='encode_3')(encoded)
encoded = Dense(encoding_dim, activation='sigmoid', kernel_initializer=layer_init, name='encoder')(encoded)
decoded = Dense(10, activation=act, kernel_initializer=layer_init, name='decode_1')(encoded)
decoded = Dense(64, activation=act, kernel_initializer=layer_init, name='decode_2')(decoded)
decoded = Dense(128, activation=act, kernel_initializer=layer_init, name='decode_3')(decoded)
decoded = Dense(784, activation='sigmoid', kernel_initializer=layer_init, name='decode_4')(decoded)
autoencoder = Model(inputs=[input_], outputs=[decoded])
autoencoder.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_0 (InputLayer) (None, 784) 0 _________________________________________________________________ encode_1 (Dense) (None, 128) 100480 _________________________________________________________________ encode_2 (Dense) (None, 64) 8256 _________________________________________________________________ encode_3 (Dense) (None, 10) 650 _________________________________________________________________ encoder (Dense) (None, 2) 22 _________________________________________________________________ decode_1 (Dense) (None, 10) 30 _________________________________________________________________ decode_2 (Dense) (None, 64) 704 _________________________________________________________________ decode_3 (Dense) (None, 128) 8320 _________________________________________________________________ decode_4 (Dense) (None, 784) 101136 ================================================================= Total params: 219,598 Trainable params: 219,598 Non-trainable params: 0 _________________________________________________________________
def tf_sigmoid_cross_entropy(target, output):
_epsilon = keras.backend.tensorflow_backend._to_tensor(keras.backend.epsilon(), output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
def check_folder(log_path, del_mode):
if del_mode:
if os.path.exists(log_path):
for f in os.listdir(log_path):
os.remove(os.path.join(log_path, f))
return log_path
else:
return log_path+'0'
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=check_folder('logs/glorot_uniform_init', 1),
histogram_freq=1,
write_graph=True, write_grads=True)
autoencoder.compile(loss=tf_sigmoid_cross_entropy,
optimizer=Adam(lr=0.001))


encoder = Model(inputs=[input_], outputs=[encoded])
encoder.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_0 (InputLayer) (None, 784) 0 _________________________________________________________________ encode_1 (Dense) (None, 128) 100480 _________________________________________________________________ encode_2 (Dense) (None, 64) 8256 _________________________________________________________________ encode_3 (Dense) (None, 10) 650 _________________________________________________________________ encoder (Dense) (None, 2) 22 ================================================================= Total params: 109,408 Trainable params: 109,408 Non-trainable params: 0 _________________________________________________________________
encoder.predict(x_test[:5])
array([[ 0.68607628, 0.66867411],
[ 0.6878587 , 0.6734581 ],
[ 0.69035929, 0.67316854],
[ 0.68854773, 0.67028314],
[ 0.68765885, 0.67222691]], dtype=float32)
f
fig
autoencoder.fit(x_train, x_train,
epochs=7, batch_size=128,
shuffle=True, verbose=1,
validation_data=(x_test, x_test),
#callbacks=[tensorboard_callback]
)
Train on 60000 samples, validate on 10000 samples Epoch 1/7 60000/60000 [==============================] - 9s - loss: 0.4951 - val_loss: 0.4833 Epoch 2/7 60000/60000 [==============================] - 9s - loss: 0.4366 - val_loss: 0.4236 Epoch 3/7 60000/60000 [==============================] - 9s - loss: 0.4157 - val_loss: 0.4098 Epoch 4/7 60000/60000 [==============================] - 10s - loss: 0.4025 - val_loss: 0.3971 Epoch 5/7 60000/60000 [==============================] - 9s - loss: 0.3914 - val_loss: 0.3883 Epoch 6/7 60000/60000 [==============================] - 9s - loss: 0.3846 - val_loss: 0.3843 Epoch 7/7 60000/60000 [==============================] - 9s - loss: 0.3802 - val_loss: 0.3805
<keras.callbacks.History at 0x7fda50216b70>
fig
http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf
這邊有一篇詳細的論完講了keras這個dense layer預設的init,這邊我們就不詳細解釋,為什麼很多人在一般的classfication或是用relu當activate的時候效果都還不錯,甚至有些人會說keras隨便預設都train的比自幹tf的好?當然不是用keras的人強...是keras聚集了很多的研究經驗tune好很多東西了,只能說Chollet大神很猛,基本去讀keras source code就會看到什麼funtion是用哪個論文實作的註解
from keras.initializers import glorot_uniform, glorot_normal, RandomNormal, RandomUniform, TruncatedNormal
layer_init = TruncatedNormal(mean=.0, stddev=1.)
act = 'sigmoid'
encoding_dim = 2
input_ = Input(name='input_0', shape=(28*28,))
encoded = Dense(128, activation=act, kernel_initializer=layer_init, name='encode_1')(input_)
encoded = Dense(64, activation=act, kernel_initializer=layer_init, name='encode_2')(encoded)
encoded = Dense(10, activation=act, kernel_initializer=layer_init, name='encode_3')(encoded)
encoded = Dense(encoding_dim, activation='sigmoid', kernel_initializer=layer_init, name='encoder')(encoded)
decoded = Dense(10, activation=act, kernel_initializer=layer_init, name='decode_1')(encoded)
decoded = Dense(64, activation=act, kernel_initializer=layer_init, name='decode_2')(decoded)
decoded = Dense(128, activation=act, kernel_initializer=layer_init, name='decode_3')(decoded)
decoded = Dense(784, activation='sigmoid', kernel_initializer=layer_init, name='decode_4')(decoded)
autoencoder = Model(inputs=[input_], outputs=[decoded])
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=check_folder('logs/truncated_normal_init', 1),
histogram_freq=1,
write_graph=True, write_grads=True)
autoencoder.compile(loss=tf_sigmoid_cross_entropy,
optimizer=Adam(lr=0.001))
fig
autoencoder.fit(x_train, x_train,
epochs=6, batch_size=128,
shuffle=True, verbose=1,
validation_data=(x_test, x_test),
#callbacks=[tensorboard_callback]
)
Train on 60000 samples, validate on 10000 samples Epoch 1/6 60000/60000 [==============================] - 9s - loss: 0.4430 - val_loss: 0.3982 Epoch 2/6 60000/60000 [==============================] - 9s - loss: 0.3863 - val_loss: 0.3833ss: Epoch 3/6 60000/60000 [==============================] - 9s - loss: 0.3788 - val_loss: 0.3784 Epoch 4/6 60000/60000 [==============================] - 9s - loss: 0.3737 - val_loss: 0.3732 Epoch 5/6 60000/60000 [==============================] - 9s - loss: 0.3689 - val_loss: 0.3686 Epoch 6/6 60000/60000 [==============================] - 9s - loss: 0.3645 - val_loss: 0.3644
<keras.callbacks.History at 0x7fda2ecceef0>
fig
layer_init = glorot_normal()
act = 'sigmoid'
encoding_dim = 2
input_ = Input(name='input_0', shape=(28*28,))
encoded = Dense(128, activation=act, kernel_initializer=layer_init, name='encode_1')(input_)
encoded = Dense(64, activation=act, kernel_initializer=layer_init, name='encode_2')(encoded)
encoded = Dense(10, activation=act, kernel_initializer=layer_init, name='encode_3')(encoded)
encoded = Dense(encoding_dim, activation='sigmoid', kernel_initializer=layer_init, name='encoder')(encoded)
decoded = Dense(10, activation=act, kernel_initializer=layer_init, name='decode_1')(encoded)
decoded = Dense(64, activation=act, kernel_initializer=layer_init, name='decode_2')(decoded)
decoded = Dense(128, activation=act, kernel_initializer=layer_init, name='decode_3')(decoded)
decoded = Dense(784, activation='sigmoid', kernel_initializer=layer_init, name='decode_4')(decoded)
autoencoder = Model(inputs=[input_], outputs=[decoded])
autoencoder.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_0 (InputLayer) (None, 784) 0 _________________________________________________________________ encode_1 (Dense) (None, 128) 100480 _________________________________________________________________ encode_2 (Dense) (None, 64) 8256 _________________________________________________________________ encode_3 (Dense) (None, 10) 650 _________________________________________________________________ encoder (Dense) (None, 2) 22 _________________________________________________________________ decode_1 (Dense) (None, 10) 30 _________________________________________________________________ decode_2 (Dense) (None, 64) 704 _________________________________________________________________ decode_3 (Dense) (None, 128) 8320 _________________________________________________________________ decode_4 (Dense) (None, 784) 101136 ================================================================= Total params: 219,598 Trainable params: 219,598 Non-trainable params: 0 _________________________________________________________________
fig
layer_init = RandomUniform(minval=-1, maxval=1)
act = 'sigmoid'
encoding_dim = 2
input_ = Input(name='input_0', shape=(28*28,))
encoded = Dense(128, activation=act, kernel_initializer=layer_init, name='encode_1')(input_)
encoded = Dense(64, activation=act, kernel_initializer=layer_init, name='encode_2')(encoded)
encoded = Dense(10, activation=act, kernel_initializer=layer_init, name='encode_3')(encoded)
encoded = Dense(encoding_dim, activation='sigmoid', kernel_initializer=layer_init, name='encoder')(encoded)
decoded = Dense(10, activation=act, kernel_initializer=layer_init, name='decode_1')(encoded)
decoded = Dense(64, activation=act, kernel_initializer=layer_init, name='decode_2')(decoded)
decoded = Dense(128, activation=act, kernel_initializer=layer_init, name='decode_3')(decoded)
decoded = Dense(784, activation='sigmoid', kernel_initializer=layer_init, name='decode_4')(decoded)
autoencoder = Model(inputs=[input_], outputs=[decoded])
autoencoder.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_0 (InputLayer) (None, 784) 0 _________________________________________________________________ encode_1 (Dense) (None, 128) 100480 _________________________________________________________________ encode_2 (Dense) (None, 64) 8256 _________________________________________________________________ encode_3 (Dense) (None, 10) 650 _________________________________________________________________ encoder (Dense) (None, 2) 22 _________________________________________________________________ decode_1 (Dense) (None, 10) 30 _________________________________________________________________ decode_2 (Dense) (None, 64) 704 _________________________________________________________________ decode_3 (Dense) (None, 128) 8320 _________________________________________________________________ decode_4 (Dense) (None, 784) 101136 ================================================================= Total params: 219,598 Trainable params: 219,598 Non-trainable params: 0 _________________________________________________________________
autoencoder.compile(loss=tf_sigmoid_cross_entropy,
optimizer=Adam(lr=0.001))
fig
autoencoder.fit(x_train, x_train,
epochs=6, batch_size=128,
shuffle=True, verbose=1,
validation_data=(x_test, x_test),
#callbacks=[tensorboard_callback]
)
Train on 60000 samples, validate on 10000 samples Epoch 1/6 60000/60000 [==============================] - 5s - loss: 0.5530 - val_loss: 0.4663 Epoch 2/6 60000/60000 [==============================] - 5s - loss: 0.4361 - val_loss: 0.4285 Epoch 3/6 60000/60000 [==============================] - 5s - loss: 0.4260 - val_loss: 0.4253 Epoch 4/6 60000/60000 [==============================] - 5s - loss: 0.4206 - val_loss: 0.4124 Epoch 5/6 60000/60000 [==============================] - 5s - loss: 0.3845 - val_loss: 0.3733 Epoch 6/6 60000/60000 [==============================] - 5s - loss: 0.3674 - val_loss: 0.3651
<keras.callbacks.History at 0x7f31c0cf4e48>
fig
layer_init = RandomNormal(mean=.0, stddev=1.)
act = 'sigmoid'
encoding_dim = 2
input_ = Input(name='input_0', shape=(28*28,))
encoded = Dense(128, activation=act, kernel_initializer=layer_init, name='encode_1')(input_)
encoded = Dense(64, activation=act, kernel_initializer=layer_init, name='encode_2')(encoded)
encoded = Dense(10, activation=act, kernel_initializer=layer_init, name='encode_3')(encoded)
encoded = Dense(encoding_dim, activation='sigmoid', kernel_initializer=layer_init, name='encoder')(encoded)
decoded = Dense(10, activation=act, kernel_initializer=layer_init, name='decode_1')(encoded)
decoded = Dense(64, activation=act, kernel_initializer=layer_init, name='decode_2')(decoded)
decoded = Dense(128, activation=act, kernel_initializer=layer_init, name='decode_3')(decoded)
decoded = Dense(784, activation='sigmoid', kernel_initializer=layer_init, name='decode_4')(decoded)
autoencoder = Model(inputs=[input_], outputs=[decoded])
autoencoder.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_0 (InputLayer) (None, 784) 0 _________________________________________________________________ encode_1 (Dense) (None, 128) 100480 _________________________________________________________________ encode_2 (Dense) (None, 64) 8256 _________________________________________________________________ encode_3 (Dense) (None, 10) 650 _________________________________________________________________ encoder (Dense) (None, 2) 22 _________________________________________________________________ decode_1 (Dense) (None, 10) 30 _________________________________________________________________ decode_2 (Dense) (None, 64) 704 _________________________________________________________________ decode_3 (Dense) (None, 128) 8320 _________________________________________________________________ decode_4 (Dense) (None, 784) 101136 ================================================================= Total params: 219,598 Trainable params: 219,598 Non-trainable params: 0 _________________________________________________________________
autoencoder.compile(loss=tf_sigmoid_cross_entropy,
optimizer=Adam(lr=0.001))
fig
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=check_folder('logs/random_normal_init', 1),
histogram_freq=1,
write_graph=True, write_grads=True)
autoencoder.fit(x_train, x_train,
epochs=6, batch_size=128,
shuffle=True, verbose=1,
validation_data=(x_test, x_test),
callbacks=[tensorboard_callback]
)
Train on 60000 samples, validate on 10000 samples Epoch 1/6 60000/60000 [==============================] - 10s - loss: 0.7657 - val_loss: 0.4885 Epoch 2/6 60000/60000 [==============================] - 9s - loss: 0.4753 - val_loss: 0.4548 Epoch 3/6 60000/60000 [==============================] - 9s - loss: 0.4116 - val_loss: 0.3869 Epoch 4/6 60000/60000 [==============================] - 9s - loss: 0.3812 - val_loss: 0.3805 Epoch 5/6 60000/60000 [==============================] - 9s - loss: 0.3765 - val_loss: 0.3765 Epoch 6/6 60000/60000 [==============================] - 9s - loss: 0.3720 - val_loss: 0.3715
<keras.callbacks.History at 0x7f76f9ea1f98>
fig
fig
fig
fig, axes = plt.subplots(4, 9, figsize=(16, 8))
decoded_imgs = [Image.fromarray(np.uint8(arr.reshape(28, 28)*255)) for arr in autoencoder.predict(x_test[:18])]
ori_imgs = [Image.fromarray(np.uint8(x_test[i].reshape(28, 28)*255)) for i in range(18)]
pil_images = decoded_imgs + ori_imgs
for i, ax in enumerate(axes.ravel()):
im = ax.imshow(np.asarray(pil_images[i]), cmap='gray', interpolation='lanczos')
if i > 17:
i -= 18
ax.set_title(labels[y_test[i]] + ' %d'%y_test[i])
ax.set_xticks(np.arange(0, 28, 28))
ax.set_yticks(np.arange(0, 28, 28))
ax.grid(color='k', linestyle='-', linewidth=0)
fig


(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
labels = {0:"T-shirt/top", 1:"Trouser", 2:"Pullover",
3:"Dress", 4:"Coat", 5:"Sandal", 6:"Shirt",
7:"Sneaker", 8:"Bag", 9:"Ankle boot"}
y_train < 5
array([False, True, True, ..., True, True, False], dtype=bool)
x_train_f5 = x_train[y_train < 5]
y_train_f5 = y_train[y_train < 5]
x_test_f5 = x_test[y_test < 5]
y_test_f5 = y_test[y_test < 5]
feature_layers = [
Dense(512, activation='relu' ,name='dense0_0', input_shape=(784,)),
Dense(384, activation='relu' ,name='dense0_1'),
Dense(256, activation='relu' ,name='dense0_2'),
Dense(128, activation='relu' ,name='dense0_3'),
]
retrain_layer = [
Dense(56, activation='relu' ,name='dense1_0'),
Dropout(0.5, name='dropout0_1'),
Dense(num_classes, activation='softmax', name='predict_layer'),
]
model = Sequential(feature_layers + retrain_layer)
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense0_0 (Dense) (None, 512) 401920 _________________________________________________________________ dense0_1 (Dense) (None, 384) 196992 _________________________________________________________________ dense0_2 (Dense) (None, 256) 98560 _________________________________________________________________ dense0_3 (Dense) (None, 128) 32896 _________________________________________________________________ dense1_0 (Dense) (None, 56) 7224 _________________________________________________________________ dropout0_1 (Dropout) (None, 56) 0 _________________________________________________________________ predict_layer (Dense) (None, 5) 285 ================================================================= Total params: 737,877 Trainable params: 737,877 Non-trainable params: 0 _________________________________________________________________
%%time
model.fit(x_train_f5, y_train_f5,
batch_size=batch_size, epochs=epochs,
verbose=1,
validation_data=(x_test_f5, y_test_f5))
score = model.evaluate(x_test_f5, y_test_f5, verbose=0)
print('Test accuracy:', score[1])
Train on 30000 samples, validate on 5000 samples Epoch 1/5 30000/30000 [==============================] - 3s - loss: 0.5638 - acc: 0.7965 - val_loss: 0.4295 - val_acc: 0.8410 Epoch 2/5 30000/30000 [==============================] - 2s - loss: 0.3723 - acc: 0.8756 - val_loss: 0.3350 - val_acc: 0.8794 Epoch 3/5 30000/30000 [==============================] - 2s - loss: 0.3290 - acc: 0.8871 - val_loss: 0.3126 - val_acc: 0.8858 Epoch 4/5 30000/30000 [==============================] - 2s - loss: 0.2966 - acc: 0.8988 - val_loss: 0.3146 - val_acc: 0.8886 Epoch 5/5 30000/30000 [==============================] - 2s - loss: 0.2821 - acc: 0.9049 - val_loss: 0.3394 - val_acc: 0.8764 Test accuracy: 0.8764 CPU times: user 1min 29s, sys: 8.3 s, total: 1min 37s Wall time: 15 s
feature_layers[0].trainable
True
for layer in feature_layers:
layer.trainable = False
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense0_0 (Dense) (None, 512) 401920 _________________________________________________________________ dense0_1 (Dense) (None, 384) 196992 _________________________________________________________________ dense0_2 (Dense) (None, 256) 98560 _________________________________________________________________ dense0_3 (Dense) (None, 128) 32896 _________________________________________________________________ dense1_0 (Dense) (None, 56) 7224 _________________________________________________________________ dropout0_1 (Dropout) (None, 56) 0 _________________________________________________________________ predict_layer (Dense) (None, 5) 285 ================================================================= Total params: 737,877 Trainable params: 7,509 Non-trainable params: 730,368 _________________________________________________________________
x_train_last5 = x_train[y_train >= 5]
y_train_last5 = y_train[y_train >= 5] - 5
x_test_last5 = x_test[y_test >= 5]
y_test_last5 = y_test[y_test >= 5] - 5
less_data_num = 5000
x_train_last5 = x_train_last5[:less_data_num]
y_train_last5 = y_train_last5[:less_data_num]
model.compile(loss=categorical_crossentropy,
optimizer=Adam(),
metrics=['accuracy'])
score = model.evaluate(x_test_last5, y_test_last5, verbose=0)
print('Test accuracy:', score[1])
Test accuracy: 0.1922
correct_arr = np.argmax(y_test_last5[:], axis=1) == np.argmax(model.predict(x_test_last5[:]), axis=1)
correct_ind = [i for i, bo in enumerate(correct_arr) if bo==True]
correct_ind[:10]
[3, 9, 14, 21, 23, 29, 38, 40, 44, 54]
fig
%%time
epochs = 50
model.fit(x_train_last5, y_train_last5,
batch_size=batch_size, epochs=epochs,
verbose=1,
validation_data=(x_test_last5, y_test_last5))
score = model.evaluate(x_test_last5, y_test_last5, verbose=0)
print('Test accuracy:', score[1])
Train on 5000 samples, validate on 5000 samples Epoch 1/50 5000/5000 [==============================] - 0s - loss: 3.0787 - acc: 0.1974 - val_loss: 2.2825 - val_acc: 0.2026 Epoch 2/50 5000/5000 [==============================] - 0s - loss: 1.9038 - acc: 0.2004 - val_loss: 1.5540 - val_acc: 0.2564 Epoch 3/50 5000/5000 [==============================] - 0s - loss: 1.5154 - acc: 0.2656 - val_loss: 1.3844 - val_acc: 0.3810 Epoch 4/50 5000/5000 [==============================] - 0s - loss: 1.3940 - acc: 0.3474 - val_loss: 1.3110 - val_acc: 0.4944 Epoch 5/50 5000/5000 [==============================] - 0s - loss: 1.3278 - acc: 0.4026 - val_loss: 1.2495 - val_acc: 0.5390 Epoch 6/50 5000/5000 [==============================] - 0s - loss: 1.2668 - acc: 0.4762 - val_loss: 1.1937 - val_acc: 0.5832 Epoch 7/50 5000/5000 [==============================] - 0s - loss: 1.2255 - acc: 0.5090 - val_loss: 1.1456 - val_acc: 0.6156 Epoch 8/50 5000/5000 [==============================] - 0s - loss: 1.1931 - acc: 0.5380 - val_loss: 1.0987 - val_acc: 0.6378 Epoch 9/50 5000/5000 [==============================] - 0s - loss: 1.1504 - acc: 0.5692 - val_loss: 1.0572 - val_acc: 0.6556 Epoch 10/50 5000/5000 [==============================] - 0s - loss: 1.1080 - acc: 0.5902 - val_loss: 1.0169 - val_acc: 0.6694 Epoch 11/50 5000/5000 [==============================] - 0s - loss: 1.0739 - acc: 0.6068 - val_loss: 0.9792 - val_acc: 0.6776 Epoch 12/50 5000/5000 [==============================] - 0s - loss: 1.0484 - acc: 0.6160 - val_loss: 0.9485 - val_acc: 0.6966 Epoch 13/50 5000/5000 [==============================] - 0s - loss: 1.0256 - acc: 0.6324 - val_loss: 0.9213 - val_acc: 0.7062 Epoch 14/50 5000/5000 [==============================] - 0s - loss: 0.9980 - acc: 0.6430 - val_loss: 0.8940 - val_acc: 0.7156 Epoch 15/50 5000/5000 [==============================] - 0s - loss: 0.9695 - acc: 0.6556 - val_loss: 0.8637 - val_acc: 0.7278 Epoch 16/50 5000/5000 [==============================] - 0s - loss: 0.9509 - acc: 0.6578 - val_loss: 0.8445 - val_acc: 0.7302 Epoch 17/50 5000/5000 [==============================] - 0s - loss: 0.9300 - acc: 0.6724 - val_loss: 0.8189 - val_acc: 0.7410 Epoch 18/50 5000/5000 [==============================] - 0s - loss: 0.8954 - acc: 0.6772 - val_loss: 0.7981 - val_acc: 0.7472 Epoch 19/50 5000/5000 [==============================] - 0s - loss: 0.8730 - acc: 0.6938 - val_loss: 0.7763 - val_acc: 0.7512 Epoch 20/50 5000/5000 [==============================] - 0s - loss: 0.8653 - acc: 0.6974 - val_loss: 0.7604 - val_acc: 0.7592 Epoch 21/50 5000/5000 [==============================] - 0s - loss: 0.8540 - acc: 0.7008 - val_loss: 0.7476 - val_acc: 0.7590 Epoch 22/50 5000/5000 [==============================] - 0s - loss: 0.8407 - acc: 0.7048 - val_loss: 0.7271 - val_acc: 0.7674 Epoch 23/50 5000/5000 [==============================] - 0s - loss: 0.8199 - acc: 0.7150 - val_loss: 0.7165 - val_acc: 0.7730 Epoch 24/50 5000/5000 [==============================] - 0s - loss: 0.8099 - acc: 0.7260 - val_loss: 0.7050 - val_acc: 0.7722 Epoch 25/50 5000/5000 [==============================] - 0s - loss: 0.7902 - acc: 0.7282 - val_loss: 0.6851 - val_acc: 0.7830 Epoch 26/50 5000/5000 [==============================] - 0s - loss: 0.7895 - acc: 0.7222 - val_loss: 0.6726 - val_acc: 0.7850 Epoch 27/50 5000/5000 [==============================] - 0s - loss: 0.7550 - acc: 0.7358 - val_loss: 0.6643 - val_acc: 0.7856 Epoch 28/50 5000/5000 [==============================] - 0s - loss: 0.7570 - acc: 0.7326 - val_loss: 0.6510 - val_acc: 0.7902 Epoch 29/50 5000/5000 [==============================] - 0s - loss: 0.7453 - acc: 0.7406 - val_loss: 0.6399 - val_acc: 0.7930 Epoch 30/50 5000/5000 [==============================] - 0s - loss: 0.7355 - acc: 0.7422 - val_loss: 0.6310 - val_acc: 0.7952 Epoch 31/50 5000/5000 [==============================] - 0s - loss: 0.7158 - acc: 0.7538 - val_loss: 0.6160 - val_acc: 0.8010 Epoch 32/50 5000/5000 [==============================] - 0s - loss: 0.7126 - acc: 0.7546 - val_loss: 0.6095 - val_acc: 0.8036 Epoch 33/50 5000/5000 [==============================] - 0s - loss: 0.7034 - acc: 0.7576 - val_loss: 0.5994 - val_acc: 0.8066 Epoch 34/50 5000/5000 [==============================] - 0s - loss: 0.6982 - acc: 0.7610 - val_loss: 0.5960 - val_acc: 0.8080 Epoch 35/50 5000/5000 [==============================] - 0s - loss: 0.6849 - acc: 0.7688 - val_loss: 0.5850 - val_acc: 0.8102 Epoch 36/50 5000/5000 [==============================] - 0s - loss: 0.6794 - acc: 0.7632 - val_loss: 0.5802 - val_acc: 0.8150 Epoch 37/50 5000/5000 [==============================] - 0s - loss: 0.6804 - acc: 0.7604 - val_loss: 0.5708 - val_acc: 0.8138 Epoch 38/50 5000/5000 [==============================] - 0s - loss: 0.6699 - acc: 0.7756 - val_loss: 0.5651 - val_acc: 0.8148 Epoch 39/50 5000/5000 [==============================] - 0s - loss: 0.6578 - acc: 0.7786 - val_loss: 0.5586 - val_acc: 0.8198 Epoch 40/50 5000/5000 [==============================] - 0s - loss: 0.6631 - acc: 0.7694 - val_loss: 0.5546 - val_acc: 0.8212 Epoch 41/50 5000/5000 [==============================] - 0s - loss: 0.6542 - acc: 0.7804 - val_loss: 0.5457 - val_acc: 0.8204 Epoch 42/50 5000/5000 [==============================] - 0s - loss: 0.6392 - acc: 0.7810 - val_loss: 0.5386 - val_acc: 0.8258 Epoch 43/50 5000/5000 [==============================] - 0s - loss: 0.6337 - acc: 0.7830 - val_loss: 0.5344 - val_acc: 0.8298 Epoch 44/50 5000/5000 [==============================] - 0s - loss: 0.6294 - acc: 0.7834 - val_loss: 0.5299 - val_acc: 0.8278 Epoch 45/50 5000/5000 [==============================] - 0s - loss: 0.6291 - acc: 0.7914 - val_loss: 0.5235 - val_acc: 0.8288 Epoch 46/50 5000/5000 [==============================] - 0s - loss: 0.6143 - acc: 0.7916 - val_loss: 0.5156 - val_acc: 0.8306 Epoch 47/50 5000/5000 [==============================] - 0s - loss: 0.6127 - acc: 0.7958 - val_loss: 0.5144 - val_acc: 0.8346 Epoch 48/50 5000/5000 [==============================] - 0s - loss: 0.6038 - acc: 0.7932 - val_loss: 0.5056 - val_acc: 0.8346 Epoch 49/50 5000/5000 [==============================] - 0s - loss: 0.6048 - acc: 0.7946 - val_loss: 0.5028 - val_acc: 0.8338 Epoch 50/50 5000/5000 [==============================] - 0s - loss: 0.5983 - acc: 0.7968 - val_loss: 0.4993 - val_acc: 0.8354 Test accuracy: 0.8354 CPU times: user 1min 38s, sys: 1.91 s, total: 1min 40s Wall time: 15.7 s
from keras.layers import Conv2D, MaxPool2D
feature_layers = [
Conv2D(filters, kernel_size, padding='valid',
input_shape=input_shape, name='conv1_1'),
Activation('relu'),
Conv2D(filters, kernel_size, padding='valid', name='conv1_2'),
Activation('relu'),
MaxPool2D(pool_size=pool_size, name='maxpool1'),
Dropout(0.25, name='dropout_0'),
Conv2D(filters, kernel_size, padding='valid',
input_shape=input_shape, name='conv2_1'),
Activation('relu'),
Conv2D(filters, kernel_size, padding='valid', name='conv2_2'),
Activation('relu'),
MaxPool2D(pool_size=pool_size, name='maxpool2'),
Dropout(0.25, name='dropout_1'),
Flatten(),
]
retrain_layer = [
Dense(56, activation='relu' ,name='dense1'),
Dropout(0.5, name='dropout_2'),
Dense(num_classes, activation='softmax', name='predict_layer'),
]
model = Sequential(feature_layers + retrain_layer)
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv1_1 (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________ activation_1 (Activation) (None, 26, 26, 32) 0 _________________________________________________________________ conv1_2 (Conv2D) (None, 24, 24, 32) 9248 _________________________________________________________________ activation_2 (Activation) (None, 24, 24, 32) 0 _________________________________________________________________ maxpool1 (MaxPooling2D) (None, 12, 12, 32) 0 _________________________________________________________________ dropout_0 (Dropout) (None, 12, 12, 32) 0 _________________________________________________________________ conv2_1 (Conv2D) (None, 10, 10, 32) 9248 _________________________________________________________________ activation_3 (Activation) (None, 10, 10, 32) 0 _________________________________________________________________ conv2_2 (Conv2D) (None, 8, 8, 32) 9248 _________________________________________________________________ activation_4 (Activation) (None, 8, 8, 32) 0 _________________________________________________________________ maxpool2 (MaxPooling2D) (None, 4, 4, 32) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 4, 4, 32) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 512) 0 _________________________________________________________________ dense1 (Dense) (None, 56) 28728 _________________________________________________________________ dropout_2 (Dropout) (None, 56) 0 _________________________________________________________________ predict_layer (Dense) (None, 5) 285 ================================================================= Total params: 57,077 Trainable params: 57,077 Non-trainable params: 0 _________________________________________________________________
%%time
model.fit(x_train_f5, y_train_f5,
batch_size=batch_size, epochs=epochs,
verbose=1,
validation_data=(x_test_f5, y_test_f5))
score = model.evaluate(x_test_f5, y_test_f5, verbose=0)
print('Test accuracy:', score[1])
Train on 30000 samples, validate on 5000 samples Epoch 1/10 30000/30000 [==============================] - 32s - loss: 0.7580 - acc: 0.6961 - val_loss: 0.4354 - val_acc: 0.8274 Epoch 2/10 30000/30000 [==============================] - 31s - loss: 0.4377 - acc: 0.8359 - val_loss: 0.3570 - val_acc: 0.8610 Epoch 3/10 30000/30000 [==============================] - 31s - loss: 0.3652 - acc: 0.8705 - val_loss: 0.2825 - val_acc: 0.8996 Epoch 4/10 30000/30000 [==============================] - 31s - loss: 0.3184 - acc: 0.8892 - val_loss: 0.2462 - val_acc: 0.9140 Epoch 5/10 30000/30000 [==============================] - 31s - loss: 0.2880 - acc: 0.9009 - val_loss: 0.2308 - val_acc: 0.9168 Epoch 6/10 30000/30000 [==============================] - 31s - loss: 0.2672 - acc: 0.9070 - val_loss: 0.2213 - val_acc: 0.9240 Epoch 7/10 30000/30000 [==============================] - 31s - loss: 0.2511 - acc: 0.9134 - val_loss: 0.2139 - val_acc: 0.9204 Epoch 8/10 30000/30000 [==============================] - 31s - loss: 0.2455 - acc: 0.9164 - val_loss: 0.1986 - val_acc: 0.9314 Epoch 9/10 30000/30000 [==============================] - 31s - loss: 0.2319 - acc: 0.9197 - val_loss: 0.1959 - val_acc: 0.9338 Epoch 10/10 30000/30000 [==============================] - 31s - loss: 0.2312 - acc: 0.9213 - val_loss: 0.1960 - val_acc: 0.9310 Test accuracy: 0.931 CPU times: user 49min 11s, sys: 5min 22s, total: 54min 34s Wall time: 5min 20s
for layer in feature_layers:
layer.trainable = False
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv1_1 (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________ activation_1 (Activation) (None, 26, 26, 32) 0 _________________________________________________________________ conv1_2 (Conv2D) (None, 24, 24, 32) 9248 _________________________________________________________________ activation_2 (Activation) (None, 24, 24, 32) 0 _________________________________________________________________ maxpool1 (MaxPooling2D) (None, 12, 12, 32) 0 _________________________________________________________________ dropout_0 (Dropout) (None, 12, 12, 32) 0 _________________________________________________________________ conv2_1 (Conv2D) (None, 10, 10, 32) 9248 _________________________________________________________________ activation_3 (Activation) (None, 10, 10, 32) 0 _________________________________________________________________ conv2_2 (Conv2D) (None, 8, 8, 32) 9248 _________________________________________________________________ activation_4 (Activation) (None, 8, 8, 32) 0 _________________________________________________________________ maxpool2 (MaxPooling2D) (None, 4, 4, 32) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 4, 4, 32) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 512) 0 _________________________________________________________________ dense1 (Dense) (None, 56) 28728 _________________________________________________________________ dropout_2 (Dropout) (None, 56) 0 _________________________________________________________________ predict_layer (Dense) (None, 5) 285 ================================================================= Total params: 57,077 Trainable params: 29,013 Non-trainable params: 28,064 _________________________________________________________________
x_train_last5 = x_train[y_train >= 5]
y_train_last5 = y_train[y_train >= 5] - 5
x_test_last5 = x_test[y_test >= 5]
y_test_last5 = y_test[y_test >= 5] - 5
less_data_num = 1500
x_train_last5 = x_train_last5.reshape((-1,)+ input_shape)[:less_data_num]
x_test_last5 = x_test_last5.reshape((-1,)+ input_shape)
x_train_last5 = x_train_last5.astype('float32')
x_test_last5 = x_test_last5.astype('float32')
x_train_last5 /= 255
x_test_last5 /= 255
print(x_train_last5.shape[0], 'train samples')
print(x_test_last5.shape[0], 'test samples')
y_train_last5 = keras.utils.to_categorical(y_train_last5, num_classes)[:less_data_num]
y_test_last5 = keras.utils.to_categorical(y_test_last5, num_classes)
1500 train samples 5000 test samples
%%time
epochs = 20
model.fit(x_train_last5, y_train_last5,
batch_size=batch_size, epochs=epochs,
verbose=1,
validation_data=(x_test_last5, y_test_last5))
score = model.evaluate(x_test_last5, y_test_last5, verbose=0)
print('Test accuracy:', score[1])
Train on 1500 samples, validate on 5000 samples Epoch 1/20 1500/1500 [==============================] - 2s - loss: 3.8063 - acc: 0.1380 - val_loss: 2.9058 - val_acc: 0.1720 Epoch 2/20 1500/1500 [==============================] - 2s - loss: 2.7293 - acc: 0.1960 - val_loss: 2.1850 - val_acc: 0.3024 Epoch 3/20 1500/1500 [==============================] - 2s - loss: 2.0530 - acc: 0.2693 - val_loss: 1.7061 - val_acc: 0.3642 Epoch 4/20 1500/1500 [==============================] - 2s - loss: 1.6266 - acc: 0.3460 - val_loss: 1.3796 - val_acc: 0.4798 Epoch 5/20 1500/1500 [==============================] - 2s - loss: 1.3719 - acc: 0.4093 - val_loss: 1.1600 - val_acc: 0.6048 Epoch 6/20 1500/1500 [==============================] - 2s - loss: 1.1822 - acc: 0.4940 - val_loss: 1.0057 - val_acc: 0.6928 Epoch 7/20 1500/1500 [==============================] - 2s - loss: 1.0615 - acc: 0.5680 - val_loss: 0.8780 - val_acc: 0.7428 Epoch 8/20 1500/1500 [==============================] - 2s - loss: 0.9800 - acc: 0.6073 - val_loss: 0.7637 - val_acc: 0.7728 Epoch 9/20 1500/1500 [==============================] - 2s - loss: 0.8665 - acc: 0.6640 - val_loss: 0.6639 - val_acc: 0.7920 Epoch 10/20 1500/1500 [==============================] - 2s - loss: 0.8039 - acc: 0.6913 - val_loss: 0.5786 - val_acc: 0.8074 Epoch 11/20 1500/1500 [==============================] - 2s - loss: 0.7628 - acc: 0.7100 - val_loss: 0.5141 - val_acc: 0.8302 Epoch 12/20 1500/1500 [==============================] - 2s - loss: 0.6693 - acc: 0.7347 - val_loss: 0.4649 - val_acc: 0.8514 Epoch 13/20 1500/1500 [==============================] - 2s - loss: 0.6248 - acc: 0.7673 - val_loss: 0.4293 - val_acc: 0.8662 Epoch 14/20 1500/1500 [==============================] - 2s - loss: 0.6218 - acc: 0.7607 - val_loss: 0.3977 - val_acc: 0.8784 Epoch 15/20 1500/1500 [==============================] - 2s - loss: 0.5786 - acc: 0.7773 - val_loss: 0.3700 - val_acc: 0.8878 Epoch 16/20 1500/1500 [==============================] - 2s - loss: 0.5315 - acc: 0.7940 - val_loss: 0.3461 - val_acc: 0.8954 Epoch 17/20 1500/1500 [==============================] - 2s - loss: 0.5308 - acc: 0.8093 - val_loss: 0.3285 - val_acc: 0.8992 Epoch 18/20 1500/1500 [==============================] - 2s - loss: 0.4867 - acc: 0.8140 - val_loss: 0.3170 - val_acc: 0.8988 Epoch 19/20 1500/1500 [==============================] - 2s - loss: 0.4672 - acc: 0.8240 - val_loss: 0.3046 - val_acc: 0.9032 Epoch 20/20 1500/1500 [==============================] - 2s - loss: 0.4535 - acc: 0.8360 - val_loss: 0.2912 - val_acc: 0.9076 Test accuracy: 0.9076 CPU times: user 5min 53s, sys: 1min, total: 6min 53s Wall time: 45.7 s
epochs = 50
model.fit(x_train_last5, y_train_last5,
batch_size=batch_size, epochs=epochs,
verbose=1,
validation_data=(x_test_last5, y_test_last5))
score = model.evaluate(x_test_last5, y_test_last5, verbose=0)
print('Test accuracy:', score[1])
Train on 1500 samples, validate on 5000 samples Epoch 1/50 1500/1500 [==============================] - 2s - loss: 0.4307 - acc: 0.8513 - val_loss: 0.2803 - val_acc: 0.9090 Epoch 2/50 1500/1500 [==============================] - 2s - loss: 0.4408 - acc: 0.8440 - val_loss: 0.2722 - val_acc: 0.9114 Epoch 3/50 1500/1500 [==============================] - 2s - loss: 0.4195 - acc: 0.8480 - val_loss: 0.2664 - val_acc: 0.9148 Epoch 4/50 1500/1500 [==============================] - 2s - loss: 0.3875 - acc: 0.8740 - val_loss: 0.2601 - val_acc: 0.9154 Epoch 5/50 1500/1500 [==============================] - 2s - loss: 0.3899 - acc: 0.8640 - val_loss: 0.2514 - val_acc: 0.9162 Epoch 6/50 1500/1500 [==============================] - 2s - loss: 0.3879 - acc: 0.8640 - val_loss: 0.2444 - val_acc: 0.9170 Epoch 7/50 1500/1500 [==============================] - 2s - loss: 0.3589 - acc: 0.8673 - val_loss: 0.2394 - val_acc: 0.9186 Epoch 8/50 1500/1500 [==============================] - 2s - loss: 0.3592 - acc: 0.8740 - val_loss: 0.2357 - val_acc: 0.9172 Epoch 9/50 1500/1500 [==============================] - 2s - loss: 0.3453 - acc: 0.8853 - val_loss: 0.2351 - val_acc: 0.9186 Epoch 10/50 1500/1500 [==============================] - 2s - loss: 0.3304 - acc: 0.8807 - val_loss: 0.2327 - val_acc: 0.9198 Epoch 11/50 1500/1500 [==============================] - 2s - loss: 0.3344 - acc: 0.8887 - val_loss: 0.2252 - val_acc: 0.9214 Epoch 12/50 1500/1500 [==============================] - 2s - loss: 0.3310 - acc: 0.8887 - val_loss: 0.2211 - val_acc: 0.9218 Epoch 13/50 1500/1500 [==============================] - 2s - loss: 0.3226 - acc: 0.8873 - val_loss: 0.2193 - val_acc: 0.9220 Epoch 14/50 1500/1500 [==============================] - 2s - loss: 0.3141 - acc: 0.8860 - val_loss: 0.2190 - val_acc: 0.9232 Epoch 15/50 1500/1500 [==============================] - 2s - loss: 0.3391 - acc: 0.8807 - val_loss: 0.2181 - val_acc: 0.9248 Epoch 16/50 1500/1500 [==============================] - 2s - loss: 0.3063 - acc: 0.8920 - val_loss: 0.2171 - val_acc: 0.9250 Epoch 17/50 1500/1500 [==============================] - 2s - loss: 0.3034 - acc: 0.8973 - val_loss: 0.2180 - val_acc: 0.9248 Epoch 18/50 1500/1500 [==============================] - 2s - loss: 0.2917 - acc: 0.9033 - val_loss: 0.2163 - val_acc: 0.9240 Epoch 19/50 1500/1500 [==============================] - 2s - loss: 0.2776 - acc: 0.9020 - val_loss: 0.2128 - val_acc: 0.9264 Epoch 20/50 1500/1500 [==============================] - 2s - loss: 0.2771 - acc: 0.9007 - val_loss: 0.2089 - val_acc: 0.9268 Epoch 21/50 1500/1500 [==============================] - 2s - loss: 0.2881 - acc: 0.8913 - val_loss: 0.2073 - val_acc: 0.9284 Epoch 22/50 1500/1500 [==============================] - 2s - loss: 0.2957 - acc: 0.9007 - val_loss: 0.2070 - val_acc: 0.9286 Epoch 23/50 1500/1500 [==============================] - 2s - loss: 0.2795 - acc: 0.9033 - val_loss: 0.2052 - val_acc: 0.9290 Epoch 24/50 1500/1500 [==============================] - 2s - loss: 0.2943 - acc: 0.9080 - val_loss: 0.2023 - val_acc: 0.9290 Epoch 25/50 1500/1500 [==============================] - 2s - loss: 0.2688 - acc: 0.9087 - val_loss: 0.2003 - val_acc: 0.9298 Epoch 26/50 1500/1500 [==============================] - 2s - loss: 0.2647 - acc: 0.9167 - val_loss: 0.1985 - val_acc: 0.9320 Epoch 27/50 1500/1500 [==============================] - 2s - loss: 0.2593 - acc: 0.9007 - val_loss: 0.1977 - val_acc: 0.9306 Epoch 28/50 1500/1500 [==============================] - 2s - loss: 0.2669 - acc: 0.9067 - val_loss: 0.1969 - val_acc: 0.9320 Epoch 29/50 1500/1500 [==============================] - 2s - loss: 0.2714 - acc: 0.9027 - val_loss: 0.1983 - val_acc: 0.9332 Epoch 30/50 1500/1500 [==============================] - 2s - loss: 0.2458 - acc: 0.9173 - val_loss: 0.1993 - val_acc: 0.9312 Epoch 31/50 1500/1500 [==============================] - 2s - loss: 0.2738 - acc: 0.9047 - val_loss: 0.1952 - val_acc: 0.9340 Epoch 32/50 1500/1500 [==============================] - 2s - loss: 0.2461 - acc: 0.9247 - val_loss: 0.1907 - val_acc: 0.9358 Epoch 33/50 1500/1500 [==============================] - 2s - loss: 0.2517 - acc: 0.9140 - val_loss: 0.1882 - val_acc: 0.9358 Epoch 34/50 1500/1500 [==============================] - 2s - loss: 0.2492 - acc: 0.9213 - val_loss: 0.1868 - val_acc: 0.9354 Epoch 35/50 1500/1500 [==============================] - 2s - loss: 0.2420 - acc: 0.9107 - val_loss: 0.1868 - val_acc: 0.9346 Epoch 36/50 1500/1500 [==============================] - 2s - loss: 0.2408 - acc: 0.9127 - val_loss: 0.1892 - val_acc: 0.9352 Epoch 37/50 1500/1500 [==============================] - 2s - loss: 0.2458 - acc: 0.9127 - val_loss: 0.1913 - val_acc: 0.9350 Epoch 38/50 1500/1500 [==============================] - 2s - loss: 0.2278 - acc: 0.9213 - val_loss: 0.1902 - val_acc: 0.9350 Epoch 39/50 1500/1500 [==============================] - 2s - loss: 0.2340 - acc: 0.9253 - val_loss: 0.1869 - val_acc: 0.9358 Epoch 40/50 1500/1500 [==============================] - 2s - loss: 0.2368 - acc: 0.9240 - val_loss: 0.1853 - val_acc: 0.9360 Epoch 41/50 1500/1500 [==============================] - 2s - loss: 0.2294 - acc: 0.9220 - val_loss: 0.1853 - val_acc: 0.9370 Epoch 42/50 1500/1500 [==============================] - 2s - loss: 0.2355 - acc: 0.9193 - val_loss: 0.1861 - val_acc: 0.9382 Epoch 43/50 1500/1500 [==============================] - 2s - loss: 0.2241 - acc: 0.9227 - val_loss: 0.1830 - val_acc: 0.9372 Epoch 44/50 1500/1500 [==============================] - 2s - loss: 0.2098 - acc: 0.9300 - val_loss: 0.1811 - val_acc: 0.9388 Epoch 45/50 1500/1500 [==============================] - 2s - loss: 0.2022 - acc: 0.9313 - val_loss: 0.1825 - val_acc: 0.9384 Epoch 46/50 1500/1500 [==============================] - 2s - loss: 0.2229 - acc: 0.9173 - val_loss: 0.1807 - val_acc: 0.9392 Epoch 47/50 1500/1500 [==============================] - 2s - loss: 0.2264 - acc: 0.9173 - val_loss: 0.1791 - val_acc: 0.9402 Epoch 48/50 1500/1500 [==============================] - 2s - loss: 0.2038 - acc: 0.9267 - val_loss: 0.1815 - val_acc: 0.9388 Epoch 49/50 1500/1500 [==============================] - 2s - loss: 0.2175 - acc: 0.9167 - val_loss: 0.1804 - val_acc: 0.9404 Epoch 50/50 1500/1500 [==============================] - 2s - loss: 0.1986 - acc: 0.9287 - val_loss: 0.1768 - val_acc: 0.9406 Test accuracy: 0.9406